library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(patchwork)
#install.packages("remotes")
#install.packages(
#  "aniMotum",
#  repos = c(
#    "https://cloud.r-project.org",
#    "https://ianjonsen.r-universe.dev"
#  ),
#  dependencies = TRUE
#)

library(aniMotum)
## Warning: package 'aniMotum' was built under R version 4.5.2
## 
## Attaching package: 'aniMotum'
## 
## The following object is masked from 'package:purrr':
## 
##     map

1 Data

sharks <- read_csv("final_whale_shark.csv")
## Rows: 12173 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): lc, sat
## dbl  (3): id, lat, lon
## dttm (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sharks %>% 
  count(id)
## # A tibble: 60 × 2
##        id     n
##     <dbl> <int>
##  1 108093    30
##  2 108096     4
##  3 108097    66
##  4 108106     2
##  5 108113    71
##  6 108114     2
##  7 112557   167
##  8 112560    18
##  9 112562     8
## 10 112563    23
## # ℹ 50 more rows
sharks <- sharks %>%
  mutate(
    date = stringr::str_replace(date, "-\\s+", "-"),
    date = ymd_hms(date)
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `date = ymd_hms(date)`.
## Caused by warning:
## !  15 failed to parse.
# Check number of observations per individual 
sharks %>% 
    count(id) %>% 
    arrange(desc(n))
## # A tibble: 60 × 2
##        id     n
##     <dbl> <int>
##  1 164969  1655
##  2 203645   894
##  3 184025   833
##  4 220364   687
##  5 172246   546
##  6 172241   541
##  7 203646   537
##  8 172239   514
##  9 157790   505
## 10 175950   485
## # ℹ 50 more rows
# Summary of Argos location classes
sharks %>% count(lc)
## # A tibble: 7 × 2
##   lc        n
##   <chr> <int>
## 1 0       458
## 2 1      1216
## 3 2      2378
## 4 3      3825
## 5 A       788
## 6 B      3489
## 7 Z        19
sharks <- sharks %>%  
  arrange(id,date)
sharks <- sharks %>%
  filter(!is.na(date))

# Largest time gap data check 
sharks %>% 
  filter (id == "172246")
## # A tibble: 544 × 6
##        id   lat   lon date                lc    sat  
##     <dbl> <dbl> <dbl> <dttm>              <chr> <chr>
##  1 172246  1.63 -92.3 2017-09-27 16:44:00 2     UT   
##  2 172246  1.65 -92.3 2017-09-28 01:25:00 B     UT   
##  3 172246  1.65 -92.3 2017-09-28 02:50:00 B     UT   
##  4 172246  1.56 -92.4 2017-09-28 14:08:00 B     UT   
##  5 172246  1.50 -92.5 2017-09-28 15:33:00 B     UT   
##  6 172246  1.58 -92.6 2017-09-28 16:17:00 3     UT   
##  7 172246  1.57 -92.6 2017-09-28 17:06:00 2     UT   
##  8 172246  1.49 -92.9 2017-09-29 12:43:00 B     UT   
##  9 172246  1.48 -92.9 2017-09-29 13:25:00 B     UT   
## 10 172246  1.47 -93.0 2017-09-29 16:06:00 2     UT   
## # ℹ 534 more rows
sharks %>% 
  filter (id == "157790")
## # A tibble: 503 × 6
##        id   lat    lon date                lc    sat  
##     <dbl> <dbl>  <dbl> <dttm>              <chr> <chr>
##  1 157790 1.40   -99.5 2017-07-18 13:02:00 2     UT   
##  2 157790 1.40   -99.5 2017-07-18 13:04:00 2     UT   
##  3 157790 1.40   -99.5 2017-07-18 13:08:00 2     UT   
##  4 157790 1.40   -99.5 2017-07-18 14:42:00 B     UT   
##  5 157790 0.915 -101.  2017-07-20 17:11:00 0     UT   
##  6 157790 0.916 -101.  2017-07-20 17:59:00 B     UT   
##  7 157790 0.774 -101.  2017-07-21 17:34:00 1     UT   
##  8 157790 0.654 -102.  2017-07-22 16:32:00 B     UT   
##  9 157790 0.623 -103.  2017-07-23 15:26:00 A     UT   
## 10 157790 0.623 -103.  2017-07-23 16:07:00 A     UT   
## # ℹ 493 more rows
sharks %>% 
  filter (id =="203637")
## # A tibble: 26 × 6
##        id   lat   lon date                lc    sat  
##     <dbl> <dbl> <dbl> <dttm>              <chr> <chr>
##  1 203637  47.6 -122. 2020-06-12 23:55:28 1     NP   
##  2 203637  47.6 -122. 2020-06-13 00:33:58 B     NK   
##  3 203637  47.7 -122. 2020-06-13 01:35:58 3     NP   
##  4 203637  47.7 -122. 2020-06-13 01:42:58 B     MA   
##  5 203637  47.7 -122. 2020-06-13 01:50:58 2     SR   
##  6 203637  47.7 -122. 2020-06-13 02:12:37 2     NK   
##  7 203637  47.7 -122. 2020-06-13 03:11:28 2     NN   
##  8 203637  47.7 -122. 2020-06-13 03:16:58 2     NP   
##  9 203637  47.7 -122. 2020-06-13 03:20:28 3     MA   
## 10 203637  47.7 -122. 2020-06-13 03:32:58 3     SR   
## # ℹ 16 more rows
sharks %>% 
  filter (id =="203638")
## # A tibble: 15 × 6
##        id    lat    lon date                lc    sat  
##     <dbl>  <dbl>  <dbl> <dttm>              <chr> <chr>
##  1 203638 47.7   -122.  2020-06-12 23:55:08 2     NP   
##  2 203638 47.7   -122.  2020-06-13 00:37:38 0     NK   
##  3 203638 47.7   -122.  2020-06-13 01:36:08 3     NP   
##  4 203638 47.7   -122.  2020-06-13 01:43:53 1     MA   
##  5 203638 47.7   -122.  2020-06-13 01:52:08 3     SR   
##  6 203638 47.7   -122.  2020-06-13 02:13:08 3     NK   
##  7 203638 47.7   -122.  2020-06-13 03:11:08 2     NN   
##  8 203638 47.7   -122.  2020-06-13 03:18:08 2     NP   
##  9 203638 47.7   -122.  2020-06-13 03:20:08 3     MA   
## 10 203638 47.7   -122.  2020-06-13 03:32:38 3     SR   
## 11 203638 47.7   -122.  2020-06-13 03:45:08 3     MC   
## 12 203638 47.7   -122.  2020-06-13 03:51:08 3     NK   
## 13 203638 43.8   -123.  2020-06-14 01:25:45 Z     SR   
## 14 203638 -0.740  -90.3 2020-08-11 15:50:07 3     MC   
## 15 203638 -0.741  -90.3 2020-08-11 15:55:11 3     NN
sharks %>% 
  filter (id =="203639")
## # A tibble: 19 × 6
##        id    lat    lon date                lc    sat  
##     <dbl>  <dbl>  <dbl> <dttm>              <chr> <chr>
##  1 203639 47.7   -122.  2020-06-12 23:54:24 2     NP   
##  2 203639 47.7   -122.  2020-06-13 00:34:54 0     NK   
##  3 203639 47.7   -122.  2020-06-13 01:36:24 2     NP   
##  4 203639 47.7   -122.  2020-06-13 01:42:54 B     MA   
##  5 203639 47.7   -122.  2020-06-13 01:52:24 3     SR   
##  6 203639 47.7   -122.  2020-06-13 02:13:04 3     NK   
##  7 203639 47.7   -122.  2020-06-13 03:12:24 3     NN   
##  8 203639 47.7   -122.  2020-06-13 03:16:54 3     NP   
##  9 203639 47.7   -122.  2020-06-13 03:20:44 3     MA   
## 10 203639 47.7   -122.  2020-06-13 03:32:54 3     SR   
## 11 203639 47.7   -122.  2020-06-13 03:45:24 3     MC   
## 12 203639 47.7   -122.  2020-06-13 03:52:39 3     NK   
## 13 203639 -0.738  -90.3 2020-08-11 15:50:17 3     MC   
## 14 203639 -0.739  -90.3 2020-08-11 15:55:32 3     NN   
## 15 203639  1.63   -92.0 2020-08-17 14:03:43 0     NK   
## 16 203639  1.64   -92.0 2020-08-17 14:35:48 1     NN   
## 17 203639  1.64   -92.0 2020-08-17 14:36:03 B     MB   
## 18 203639  1.64   -92.0 2020-08-17 15:02:35 1     MA   
## 19 203639  1.65   -92.0 2020-08-17 15:34:10 1     MC
sharks %>% 
  filter (id =="203644")
## # A tibble: 17 × 6
##        id    lat    lon date                lc    sat  
##     <dbl>  <dbl>  <dbl> <dttm>              <chr> <chr>
##  1 203644 47.7   -122.  2020-06-12 23:54:50 2     NP   
##  2 203644 47.7   -122.  2020-06-13 00:38:20 0     NK   
##  3 203644 47.7   -122.  2020-06-13 01:35:50 3     NP   
##  4 203644 47.7   -122.  2020-06-13 01:43:50 0     MA   
##  5 203644 47.7   -122.  2020-06-13 01:51:20 3     SR   
##  6 203644 47.7   -122.  2020-06-13 02:12:29 3     NK   
##  7 203644 47.7   -122.  2020-06-13 03:11:20 3     NN   
##  8 203644 47.7   -122.  2020-06-13 03:16:20 3     MA   
##  9 203644 47.7   -122.  2020-06-13 03:16:50 3     NP   
## 10 203644 47.7   -122.  2020-06-13 03:32:50 3     SR   
## 11 203644 47.7   -122.  2020-06-13 03:45:20 3     MC   
## 12 203644 47.7   -122.  2020-06-13 03:52:00 3     NK   
## 13 203644 -0.738  -90.3 2020-08-11 15:50:23 3     MC   
## 14 203644 -0.741  -90.3 2020-08-11 15:55:25 3     NN   
## 15 203644  1.33   -91.3 2020-08-19 16:02:09 0     NN   
## 16 203644  1.39   -91.8 2020-08-19 16:03:18 0     MA   
## 17 203644  1.39   -91.8 2020-08-19 16:24:40 B     MC
sharks %>% 
  filter (id =="203646")
## # A tibble: 537 × 6
##        id    lat    lon date                lc    sat  
##     <dbl>  <dbl>  <dbl> <dttm>              <chr> <chr>
##  1 203646 47.7   -122.  2019-09-08 14:00:00 2     SR   
##  2 203646 47.7   -122.  2019-09-08 15:17:00 3     NP   
##  3 203646 47.7   -122.  2019-09-08 15:28:00 3     NK   
##  4 203646 47.7   -122.  2019-09-08 15:37:00 2     SR   
##  5 203646 47.7   -122.  2019-09-08 15:45:00 3     NN   
##  6 203646 47.7   -122.  2019-09-08 17:08:00 2     NK   
##  7 203646 47.7   -122.  2019-09-08 17:10:00 3     MA   
##  8 203646 47.7   -122.  2019-09-08 17:25:00 2     NN   
##  9 203646 47.7   -122.  2019-09-08 17:44:00 2     MC   
## 10 203646 -0.738  -90.3 2020-08-11 15:49:30 3     MC   
## # ℹ 527 more rows
# Split for individuals with large temporal gaps - 172246 and 157790
# Also with 5 sharks that have huge movement that is "across the continent" after the MPM interpolation, which doesn't make sense biologically, so I headed back here at first to split these 5 sharks observations in advance. 

split_table <- tibble(
  id = c(
    "172246",
    "157790",
    "203637",
    "203638",
    "203639",
    "203644",
    "203646"
  ),
  split_time = ymd_hms(c(
    "2017-09-30 16:21:00",
    "2018-03-28 22:30:00",
    "2020-06-13 03:50:58",
    "2020-06-14 01:25:45",
    "2020-06-13 03:52:39",
    "2020-06-13 03:52:00",
    "2019-09-08 17:44:00"
    
  ))
)

2 EDA

2.1 Observation time gap between sharks

# Compute time gaps between consecutive observations within each individual
sharks_dt <- sharks %>%
  arrange(id, date) %>%
  group_by(id) %>%
  mutate(
    dt_hours = as.numeric(difftime(date, lag(date), units = "hours"))
  ) %>%
  ungroup()

# Time gap statistics per individual
sharks_dt %>%
  group_by(id) %>%
  summarise(
    n_obs = n(),
    mean_dt = mean(dt_hours, na.rm = TRUE),
    median_dt = median(dt_hours, na.rm = TRUE),
    max_dt = max(dt_hours, na.rm = TRUE)
  ) %>%
  arrange(desc(max_dt))
## Warning: There were 3 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `max_dt = max(dt_hours, na.rm = TRUE)`.
## ℹ In group 16: `id = 131988`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
## # A tibble: 60 × 5
##        id n_obs mean_dt median_dt max_dt
##     <dbl> <int>   <dbl>     <dbl>  <dbl>
##  1 172246   544   49.0      0.788 25638.
##  2 157790   503   56.2      1.43  15434.
##  3 203646   537   16.4      0.456  8110.
##  4 112557   167   42.0      1.30   6018.
##  5 139119     5  853.     215.     2984.
##  6 175953   440    7.55     0.601  2314.
##  7 172241   541    4.60     0.806  1818.
##  8 139127    12  260.      22.1    1747.
##  9 139121     7  319.      68.6    1458.
## 10 203637    26   63.1      0.642  1428.
## # ℹ 50 more rows
sharks_dt %>%
  summarise(
    min_hours = min(dt_hours, na.rm = TRUE),
    q1_hours = quantile(dt_hours, 0.25, na.rm = TRUE),
    median_hours = median(dt_hours, na.rm = TRUE),
    mean_hours = mean(dt_hours, na.rm = TRUE),
    q3_hours = quantile(dt_hours, 0.75, na.rm = TRUE),
    max_hours = max(dt_hours, na.rm = TRUE)
  )
## # A tibble: 1 × 6
##   min_hours q1_hours median_hours mean_hours q3_hours max_hours
##       <dbl>    <dbl>        <dbl>      <dbl>    <dbl>     <dbl>
## 1         0    0.365        0.773       12.2     1.68    25638.

The median of the consecutive time gap is 0.73 hours, but the maximum gap is around 2.9 years, where the tracking window has a huge gap. Raw Argos telemetry exhibited highly irregular temporal sampling, with frequent short-interval transmissions interspersed with multi-month to multi-year gaps.

# Time gap histogram 
ggplot(sharks_dt, aes(x = dt_hours)) +
  geom_histogram(bins = 60) +
  scale_x_log10() +
  theme_bw() +
  labs(
    x = "Time gap between observations (hours, log scale)",
    y = "Count"
  )
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 61 rows containing non-finite outside the scale range
## (`stat_bin()`).

sharks <- sharks %>%
  mutate(id = as.character(id))

sharks <- sharks %>%
  left_join(split_table, by = "id") %>%
  mutate(
    id = case_when(
      is.na(split_time) ~ id,
      date <= split_time ~ paste0(id, "_a"),
      date >  split_time ~ paste0(id, "_b")
    )
  ) %>%
  select(-split_time)

sharks %>% 
  count(id)
## # A tibble: 67 × 2
##    id         n
##    <chr>  <int>
##  1 108093    30
##  2 108096     4
##  3 108097    66
##  4 108106     2
##  5 108113    71
##  6 108114     2
##  7 112557   167
##  8 112560    18
##  9 112562     8
## 10 112563    23
## # ℹ 57 more rows

Initially, the raw df contains 60 sharks, but since two of them have long time gap which caused non-convergence in the first attempt of MPM, and 5 of the sharks’ movement track contains the “over-continent” track, which doesn’t make sense biologically, so these sharks are split into _a and _b to make further analysis. Therefore, with 7 sharks split to _a and _b, 67 sharks are remained.

# Raw Spatial tracks for each individual(points)
ggplot(sharks, aes(x = lon, y = lat)) +
  geom_point(alpha = 0.4) +
  facet_wrap(~id) +
  theme_bw()

# Raw Spatial tracks for each individual(path)
ggplot(sharks, aes(x = lon, y = lat)) +
  geom_path(alpha = 0.4) +
  facet_wrap(~id) +
  theme_bw()
## `geom_path()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_path()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_path()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# Check for id = 203637
# Geom_point
p1 <- ggplot() +
  theme_bw() +
  geom_point(
    data = sharks %>% dplyr::filter(id == "203637_a"),
    aes(x = lon, y = lat)
  )

print(p1)

# Geom_path
p2 <- ggplot() +
  theme_bw() +
  geom_path(
    data = sharks %>% dplyr::filter(id == "203637_a"),
    aes(x = lon, y = lat)
  )

print(p2)

2.2 Velocity check

# Stepwise speed between consecutive observations
sharks_speed <- sharks_dt %>%
  mutate(
    dist_m = geosphere::distHaversine(
      cbind(lon, lat),
      cbind(lag(lon), lag(lat))
    ),
    speed_ms = dist_m / (dt_hours * 3600)
  )
ggplot(sharks_speed, aes(x = speed_ms)) +
  geom_histogram(bins = 60) +
  scale_x_log10() +
  theme_bw() +
  labs(x = "Raw speed (m/s, log scale)")
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 63 rows containing non-finite outside the scale range
## (`stat_bin()`).

# proportion of raw steps exceeding 2.7m/s
sharks_speed %>%
  summarise(
    prop_over_2_7 = mean(speed_ms > 2.7, na.rm = TRUE)
  )
## # A tibble: 1 × 1
##   prop_over_2_7
##           <dbl>
## 1         0.123

Approximately 12% of observed movement steps exceeded a biologically informed maximum swimming speed of 2.7 m/s.

2.3 Sanity check

Since the SSM model needs at least 3 points to fit, there are 9 sharks with less than 3 observations, which will be excluded from our further analysis.

# Exclude individuals with fewer than three observations 
sharks %>%
  count(id) %>%
  filter(n < 3)
## # A tibble: 9 × 2
##   id           n
##   <chr>    <int>
## 1 108106       2
## 2 108114       2
## 3 131988       1
## 4 132070       2
## 5 132073       2
## 6 139120       1
## 7 139122       1
## 8 203638_b     2
## 9 220357       2
sharks_ssm <- sharks %>%
  group_by(id) %>%
  filter(n() >= 3) %>%
  ungroup()

sharks_ssm %>% 
  count(id)
## # A tibble: 58 × 2
##    id         n
##    <chr>  <int>
##  1 108093    30
##  2 108096     4
##  3 108097    66
##  4 108113    71
##  5 112557   167
##  6 112560    18
##  7 112562     8
##  8 112563    23
##  9 120701   211
## 10 120702    46
## # ℹ 48 more rows

58 sharks remaining in the sharks_ssm dataframe.

# Sanity check 

# whether date contains invalid data 
sharks_ssm %>%
  summarise(
    n_na_date = sum(is.na(date)),
    n_inf_date = sum(!is.finite(as.numeric(date)))
  )
## # A tibble: 1 × 2
##   n_na_date n_inf_date
##       <int>      <int>
## 1         0          0
# Check whether lon / lat contains invalid data
sharks_ssm %>%
  summarise(
    n_na_lon = sum(is.na(lon)),
    n_na_lat = sum(is.na(lat))
  )
## # A tibble: 1 × 2
##   n_na_lon n_na_lat
##      <int>    <int>
## 1        0        0
# Check whether time is monotonically increasing 
sharks_ssm %>%
  group_by(id) %>%
  summarise(
    any_non_increasing_time = any(diff(date) <= 0, na.rm = TRUE)
  ) %>%
  filter(any_non_increasing_time)
## # A tibble: 1 × 2
##   id     any_non_increasing_time
##   <chr>  <lgl>                  
## 1 203645 TRUE

For the sharks that have >= 3 observations, there are 0 observations of NA date as they were initially filted out. There are also non_increasing_time observations, which will be excluded.

# Check problem ids observations
problem_ids <- sharks_ssm %>%
  group_by(id) %>%
  summarise(any_non_increasing_time = any(diff(date) <= 0, na.rm = TRUE)) %>%
  filter(any_non_increasing_time) %>%
  pull(id)
# Duplicated rows
sharks_ssm %>%
  filter(id %in% problem_ids) %>%
  count(id, date) %>%
  filter(n > 1) %>%
  arrange(desc(n))
## # A tibble: 1 × 3
##   id     date                    n
##   <chr>  <dttm>              <int>
## 1 203645 2020-09-16 04:23:41     2

1 duplicated entries found, will be removed.

2.4 Cleaning dataset for the SSM

# Final cleaned dataset used for state-space modelling

sharks_ssm_clean <- sharks %>%
  # remove rows with invalid time or coordinates
  filter(
    !is.na(date),
    is.finite(as.numeric(date)),
    !is.na(lon),
    !is.na(lat)
  ) %>%
  
  # order observations within each individual
  arrange(id, date) %>%
  
  # remove duplicated timestamps within individuals
  distinct(id, date, .keep_all = TRUE) %>%
  
  # ensure each individual has enough data and strictly increasing time
  group_by(id) %>%
  filter(
    n() >= 3,
    all(diff(date) > 0)
  ) %>%
  ungroup()

In sharks_ssm_clean, all 58 sharks have at least three valid observations with valid timestamps and locations.

# Sanity check for clean data 
nrow(sharks)
## [1] 12158
nrow(sharks_ssm_clean)
## [1] 12142
length(unique(sharks$id))
## [1] 67
length(unique(sharks_ssm_clean$id))
## [1] 58
sharks_ssm_clean %>%
  group_by(id) %>%
  summarise(any_bad_time = any(diff(date) <= 0)) %>%
  filter(any_bad_time)
## # A tibble: 0 × 2
## # ℹ 2 variables: id <chr>, any_bad_time <lgl>

There are 58 sharks (12142 entries) remaining.

3 SSM Model Fit

# State-space model settings:
# - model: correlated random walk (crw)
# - time.step: 24 hours (daily regularization)
# - vmax: 2.7 m/s (biologically informed maximum swimming speed)

sharks_crw_ssm <- fit_ssm(
  x = sharks_ssm_clean,
  model = "crw",
  time.step = 24,     
  vmax = 2.7,          
  control = ssm_control(
    verbose = 0       
  )
)
## Warning in pf_sda_filter(x, spdf, vmax, ang, distlim): 
## trip::sda produced an error on id 132071 using trip::speedfilter instead
## Warning in pf_sda_filter(x, spdf, vmax, ang, distlim): 
## trip::sda produced an error on id 139123 using trip::speedfilter instead
## Warning in sqrt(as.numeric(object$diag.cov.random)): NaNs produced
## Warning in sqrt(as.numeric(object$diag.cov.random)): NaNs produced
## Warning in sqrt(as.numeric(object$diag.cov.random)): NaNs produced
# Extract fitted locations (at observation times)
sharks_fitted <- grab(sharks_crw_ssm, what = "fitted") %>%
  as_tibble()

# Extract predicted locations (regular 24 h interval)
sharks_predicted <- grab(sharks_crw_ssm, what = "predicted") %>%
  as_tibble()
# Dataframe with convergence indicators 
sharks_crw_ssm
## # A tibble: 58 × 5
##    id     ssm          converged pdHess pmodel
##    <chr>  <named list> <lgl>     <lgl>  <chr> 
##  1 108093 <ssm [15]>   TRUE      TRUE   crw   
##  2 108096 <ssm [15]>   TRUE      TRUE   crw   
##  3 108097 <ssm [15]>   TRUE      TRUE   crw   
##  4 108113 <ssm [15]>   TRUE      TRUE   crw   
##  5 112557 <ssm [15]>   TRUE      TRUE   crw   
##  6 112560 <ssm [15]>   TRUE      TRUE   crw   
##  7 112562 <ssm [15]>   TRUE      TRUE   crw   
##  8 112563 <ssm [15]>   TRUE      TRUE   crw   
##  9 120701 <ssm [15]>   TRUE      TRUE   crw   
## 10 120702 <ssm [15]>   TRUE      TRUE   crw   
## # ℹ 48 more rows
# Identify individuals with non-converged SSM fits
nonconv_ids <- sharks_crw_ssm %>%
filter(!converged | !pdHess) %>%
pull(id)

nonconv_ids
## [1] "172246_b"

One individuals (“172246_b”) did not converge.

# Inspect observation histories of non-converged individuals
sharks_ssm_clean %>% 
  filter(id == "172246_b")
## # A tibble: 529 × 6
##    id         lat   lon date                lc    sat  
##    <chr>    <dbl> <dbl> <dttm>              <chr> <chr>
##  1 172246_b -1.14 -91.5 2017-10-08 17:02:08 1     MA   
##  2 172246_b -1.11 -91.6 2017-10-08 20:13:45 1     NP   
##  3 172246_b -1.11 -91.6 2017-10-08 21:49:35 3     NP   
##  4 172246_b -1.10 -91.6 2017-10-08 22:58:34 3     SR   
##  5 172246_b -1.10 -91.6 2017-10-08 23:18:30 2     NK   
##  6 172246_b -1.09 -91.7 2017-10-09 00:35:55 3     SR   
##  7 172246_b -1.09 -91.7 2017-10-09 00:54:48 3     NK   
##  8 172246_b -1.09 -91.7 2017-10-09 01:01:39 3     NN   
##  9 172246_b -1.08 -91.7 2017-10-09 02:24:28 3     MA   
## 10 172246_b -1.08 -91.7 2017-10-09 02:38:31 3     NN   
## # ℹ 519 more rows

2017 jumped to 2020, which is probably the reason of non-convergence.

# Retain SSM fits that converged with a positive-definite Hessian
ssm_valid <- sharks_crw_ssm %>%
filter(converged, pdHess)

# Record IDs excluded due to non-convergence
ssm_excluded_ids <- sharks_crw_ssm %>%
filter(!converged | !pdHess) %>%
pull(id)

ssm_excluded_ids
## [1] "172246_b"

After excluding non-converged fits, 57 sharks remained for further analysis.

# Extract fitted and predicted locations from valid SSM fits only
sharks_fitted <- grab(ssm_valid, what = "fitted") %>%
as_tibble()

sharks_predicted <- grab(ssm_valid, what = "predicted") %>%
as_tibble()

3.1 Visualization of model fit

To illustrate model behavior under different data conditions, predicted tracks were visualized for a small number of representative individuals.

# Individual with the minimum number of observations required for SSM fitting 
three_obs_id <- "132071"

plot(
ssm_valid |> dplyr::filter(id == three_obs_id),
what = "predicted",
type = 1
)
## $`132071`
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

plot(
ssm_valid |> dplyr::filter(id == three_obs_id),
what = "predicted",
type = 2
)
## $`132071`
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

Individuals with extremely sparse observations showed high positional uncertainty and were not used for behavioural interpretation.

# Individual with one of the largest temporal gaps between observations
large_gap_id <- "112557"

plot(
ssm_valid |> dplyr::filter(id == large_gap_id),
what = "predicted",
type = 1
)
## $`112557`
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

plot(
ssm_valid |> dplyr::filter(id == large_gap_id),
what = "predicted",
type = 2
)
## $`112557`
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

sharks_ssm_clean %>% 
  filter(id == "112557")
## # A tibble: 167 × 6
##    id       lat   lon date                lc    sat  
##    <chr>  <dbl> <dbl> <dttm>              <chr> <chr>
##  1 112557  5.01 -86.2 2012-12-07 01:26:12 B     NL   
##  2 112557  5.05 -86.3 2012-12-07 16:07:17 B     MA   
##  3 112557  5.10 -86.2 2012-12-08 15:41:49 B     MA   
##  4 112557  5.07 -86.2 2012-12-08 18:02:57 0     NP   
##  5 112557  5.26 -86.1 2012-12-08 20:20:05 0     NN   
##  6 112557  5.32 -86.1 2012-12-08 22:45:34 B     NK   
##  7 112557  5.40 -86.0 2012-12-09 15:14:03 0     MA   
##  8 112557  5.39 -86.0 2012-12-09 15:15:59 1     NL   
##  9 112557  5.24 -86.1 2012-12-09 16:58:47 0     MA   
## 10 112557  5.27 -86.0 2012-12-09 22:20:06 B     NK   
## # ℹ 157 more rows

For this shark (id =112557), the shark has only 54 observations in 2012, with a gap of almost one year, followed by the observation almost one year, where the one year in between have huge uncertainties according to the graph.

# Individual with a more typical observation pattern
typical_id <- "164968"

plot(
ssm_valid |> dplyr::filter(id == typical_id),
what = "predicted",
type = 1
)
## $`164968`
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

plot(
ssm_valid |> dplyr::filter(id == typical_id),
what = "predicted",
type = 2
)
## $`164968`
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

This individual shows a more typical observation pattern, with stable reconstructed tracks and moderate uncertainty.

# Typical 
ty_id <- "203645"

plot(
ssm_valid |> dplyr::filter(id == ty_id),
what = "predicted",
type = 1
)
## $`203645`
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

plot(
ssm_valid |> dplyr::filter(id == ty_id),
what = "predicted",
type = 2
)
## $`203645`
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

Visualization shark track on a spatial shapefile:

# Visualize predicted track for a representative individual
aniMotum::map(
ssm_valid |> dplyr::filter(id == "164968"),
what = "predicted"
)
## using map scale: 10

# Compared with initial raw data
raw_164968 <- ggplot() +
  theme_bw() +
  geom_point(
    data = sharks %>% dplyr::filter(id == "164968"),
    aes(x = lon, y = lat)
  )

print(raw_164968)

All 57 shark tracks were visualized on a spatial shapefile:

# Visualize predicted tracks for all converged individuals
aniMotum::map(
ssm_valid,
what = "predicted"
)
## using map scale: 10

4 SSM diagnostics

The diagnostics are applied to the above three sharks, with 1. time series plot 2. QQ plot 3. ACF

# SSM diagnostics for representative individuals
# - sparse observations
# - large temporal gaps
# - typical observation pattern

diagnostic_ids <- c("132071", "112557", "164968")

ssm_diag <- ssm_valid |>
dplyr::filter(id %in% diagnostic_ids)

# OSAR (one step ahead residuals)
res_diag <- osar(ssm_diag)

# Time series residuals 
plot(
res_diag,
type = "ts",
pages = 1
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small.  fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 1.4414e+09
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 31812
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4.4986e+06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small.  fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 1.4414e+09
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 31812
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4.4986e+06

# QQ plot 
plot(
res_diag,
type = "qq",
pages = 1
)
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

#ACF
plot(
res_diag,
type = "acf",
pages = 1
)

Overall, the diagnostic plots do not raise major concerns about model fit. Residuals show no clear trends over time, their distribution is broadly consistent with model assumptions, and there is little remaining autocorrelation. This suggests the fitted state-space model adequately captures the movement dynamics in the data.

Note: Since 175953 and 203646_b failed to converge in MPM in later steps, their diagnostics are checked

# Additional SSM diagnostics for individuals that later failed MPM convergence
diagnostic_ids_MPM_failed <- c("175953", "203646_b")

ssm_diag_MPM_failed <- ssm_valid |>
dplyr::filter(id %in% diagnostic_ids_MPM_failed)

# OSAR (One-step-ahead residuals)
res_diag_MPM_failed <- osar(ssm_diag_MPM_failed)

# Time series residuals 
plot(
res_diag_MPM_failed,
type = "ts",
pages = 1
)

# QQ plot 
plot(
res_diag_MPM_failed,
type = "qq",
pages = 1
)
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

#ACF
plot(
res_diag_MPM_failed,
type = "acf",
pages = 1
)

For the shark with id = 175953, the maximum time gap between consecutive observations is 2313.52 hours. Despite this long gap, the SSM diagnostics do not show clear violations of model assumptions. The residuals are generally centered around zero without strong temporal trends, the QQ plots show only mild tail deviations, and the ACF plots for both x and y mostly fall within the confidence bounds, indicating that temporal dependence is adequately captured by the CRW model.

For the shark with id = 203646_b, the SSM diagnostics for this individual do not show major violations of model assumptions. Residuals are centred around zero with no strong temporal trends, their distribution broadly follows model expectations, and there is little remaining autocorrelation in either spatial dimension. These results suggest that the CRW-based state-space model provides an adequate description of the movement process for this track.

5 MPM (Move Persistence Model)

# Prepare input data for MPM
# Retain individuals with at least 20 predicted locations
pmm_data <- sharks_predicted %>%
dplyr::group_by(id) %>%
dplyr::filter(dplyr::n() >= 20) %>%
dplyr::ungroup()

# Fit move persistence model
pmm_fit <- fit_mpm(
pmm_data,
model = "mpm",
control = mpm_control(verbose = 0)
)
## Warning in sqrt(diag(object$cov.fixed)): NaNs produced
## Warning in sqrt(as.numeric(object$diag.cov.random)): NaNs produced
## Warning in sqrt(diag(object$cov.fixed)): NaNs produced
## 
## Error in nlminb(obj$par, ifelse(control$verbose == 1, myfn, obj$fn), obj$gr,  : 
##   NA/NaN gradient evaluation
## Warning in sqrt(diag(object$cov.fixed)): NaNs produced
## 
## 
## 
## 
## Error in nlminb(obj$par, ifelse(control$verbose == 1, myfn, obj$fn), obj$gr,  : 
##   NA/NaN gradient evaluation
# Basic visual check of fitted MPM output
plot(pmm_fit, pages = 1)
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## Warning: Removed 68 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.
## `label` cannot be a <ggplot2::element_blank> object.

# Dataframe with indicators of convergence of MPM
pmm_fit
## # A tibble: 33 × 4
##    id       mpm          converged model
##    <chr>    <named list> <lgl>     <chr>
##  1 112557   <mpm [8]>    TRUE      mpm  
##  2 131986   <mpm [8]>    TRUE      mpm  
##  3 132074   <mpm [8]>    TRUE      mpm  
##  4 139119   <mpm [8]>    TRUE      mpm  
##  5 139121   <mpm [8]>    TRUE      mpm  
##  6 139124   <mpm [8]>    TRUE      mpm  
##  7 139125   <mpm [8]>    TRUE      mpm  
##  8 139127   <mpm [8]>    TRUE      mpm  
##  9 151677   <mpm [8]>    TRUE      mpm  
## 10 157790_a <mpm [8]>    TRUE      mpm  
## # ℹ 23 more rows

After excluding individuals with fewer than 20 predicted locations, 33 sharks were retained for MPM fitting. Among these, the model did not converge for two individuals (175953 and 203646_b), which were excluded from subsequent analyses.

6 Data Extraction and preparation

# Extract fitted move persistence estimates
mpm_gamma <- grab(pmm_fit, what = "fitted") %>%
as_tibble()

# Check structure
mpm_gamma %>% glimpse()
## Rows: 3,568
## Columns: 5
## $ id         <chr> "112557", "112557", "112557", "112557", "112557", "112557",…
## $ date       <dttm> 2012-12-07 01:00:00, 2012-12-08 01:00:00, 2012-12-09 01:00…
## $ logit_g    <dbl> 2.8361115, 2.8361115, 2.8361115, 1.9693592, -2.0727940, 3.0…
## $ logit_g.se <dbl> 5.8625925, 4.5908193, 2.7894898, 1.1587468, 1.0118099, 1.16…
## $ g          <dbl> 0.94459631, 0.94459631, 0.94459631, 0.87754226, 0.11176936,…
# Combine latent locations with movement persistence
latent_movement <- pmm_data %>%
select(
id, date, lon, lat,
x, y, x.se, y.se
) %>%
left_join(
mpm_gamma %>% select(id, date, g),
by = c("id", "date")
)

# Sanity check: missing movement persistence values

latent_movement %>%
summarise(
n_total = n(),
n_missing_gamma = sum(is.na(g))
)
## # A tibble: 1 × 2
##   n_total n_missing_gamma
##     <int>           <int>
## 1    3739             171
# Convert latent movement dataset to sf object (WGS84)
latent_sf <- latent_movement %>%
st_as_sf(coords = c("lon", "lat")) %>%
st_set_crs(4326)

7 Mapping and exploratory visualization

# Time series of movement persistence (gamma_t) by individual

ggplot(latent_movement, aes(x = date, y = g)) +
geom_line(alpha = 0.6) +
facet_wrap(~ id, scales = "free_x") +
theme_bw() +
labs(
x = "Date",
y = expression(gamma[t])
)

# Spatial visualization of tracks colored by movement persistence

# Load base map
world <- rnaturalearth::ne_countries(
scale = "medium",
returnclass = "sf"
)

# Projection
prj <- "+proj=laea +lat_0=0 +lon_0=0 +datum=WGS84 +units=m +no_defs"

ggplot() +
theme_bw() +
geom_sf(data = world, fill = "grey90", colour = "grey70") +
geom_sf(
data = latent_sf,
aes(colour = g),
size = 0.7,
alpha = 0.9
) +
scale_colour_viridis_c(
name = expression(gamma[t]),
limits = c(0, 1)
) +
coord_sf(crs = prj) +
facet_wrap(~ id)

# Exploratory labeling of movement persistence values (used for visualization only; not treated as discrete states)

latent_movement <- latent_movement %>%
mutate(
behaviour_class = case_when(
g < 0.3 ~ "low_persistence",
g > 0.7 ~ "high_persistence",
TRUE ~ "intermediate"
)
)

latent_movement %>%
count(behaviour_class)
## # A tibble: 3 × 2
##   behaviour_class      n
##   <chr>            <int>
## 1 high_persistence  2986
## 2 intermediate       650
## 3 low_persistence    103
# Retain individuals with at least one valid persistence estimate
latent_valid <- latent_movement %>%
  group_by(id) %>%
  filter(any(!is.na(g))) %>%
  ungroup()

# Descriptive summaries of movement persistence by individual
behaviour_summary <- latent_valid %>%
  group_by(id) %>%
  summarise(
    n_points = n(),
    mean_g   = mean(g, na.rm = TRUE),
    sd_g     = sd(g, na.rm = TRUE),
    range_g  = max(g, na.rm = TRUE) - min(g, na.rm = TRUE),
    lon_range = max(lon) - min(lon),
    lat_range = max(lat) - min(lat)
  ) %>%
  ungroup()

# Identify candidates with relatively typical movement persistence (used to select representative individuals for visualization)
typical_candidates <- behaviour_summary %>%
  filter(
    mean_g > quantile(mean_g, 0.4, na.rm = TRUE),
    mean_g < quantile(mean_g, 0.6, na.rm = TRUE),
    sd_g   < quantile(sd_g,   0.5, na.rm = TRUE)
  ) %>%
  arrange(sd_g)

typical_candidates
## # A tibble: 2 × 7
##   id     n_points mean_g    sd_g range_g lon_range lat_range
##   <chr>     <int>  <dbl>   <dbl>   <dbl>     <dbl>     <dbl>
## 1 184025       22  0.873 0.00159 0.00579      3.67      4.56
## 2 220361       59  0.849 0.105   0.362       12.7       4.53

Let \(\gamma_{it} \in (0,1)\) denote the movement persistence index estimated by the move persistence model for individual \(i\) at time \(t\). To summarise individual-level movement behaviour over the tracking period, we computed the following descriptive statistics of \(\gamma_{it}\).

\[ \operatorname{mean}_i(\gamma_t) \]

denoted as in the analysis, represents the average level of movement persistence for individual \(i\) across time. Values of \(\operatorname{mean}_i(\gamma_t)\) close to \(1\) indicate that the individual predominantly exhibits highly persistent, directional movement, consistent with migration-like behaviour. Values around \(0.5\) reflect intermediate persistence, suggesting mixed movement strategies. Lower values of \(\operatorname{mean}_i(\gamma_t)\) indicate movement dominated by low persistence, characterized by slower and less directed motion.

\[ \operatorname{sd}_i(\gamma_t) \]

denoted as , quantifies the temporal variability of movement persistence for individual \(i\). Low values of \(\operatorname{sd}_i(\gamma_t)\) indicate relatively stable movement behaviour over time, whereas higher values reflect substantial fluctuations in persistence, corresponding to frequent transitions between more and less directed movement.

\[ \operatorname{range}_i(\gamma_t) = \max_t(\gamma_{it}) - \min_t(\gamma_{it}) \]

denoted as , captures the extent of behavioural contrast experienced by individual \(i\) during the observation period. A small range suggests a relatively homogeneous movement pattern, while a large range indicates that the individual experienced both highly persistent and weakly persistent movement phases.

Together, these statistics characterize individual movement behaviour in terms of its average persistence, temporal stability, and behavioural heterogeneity. They are used solely for descriptive interpretation and visualization, and do not imply discrete behavioural states. All behavioural groupings based on these summaries are intended to aid interpretation of the continuous movement persistence process estimated by the model.

# High persistence candidates (for visualization)

high_persistence_candidates <- behaviour_summary %>%
filter(
mean_g > quantile(mean_g, 0.75, na.rm = TRUE),
sd_g   < quantile(sd_g,   0.5,  na.rm = TRUE)
) %>%
mutate(
spatial_extent = lon_range + lat_range
) %>%
arrange(desc(spatial_extent))

high_persistence_candidates
## # A tibble: 7 × 8
##   id       n_points mean_g       sd_g range_g lon_range lat_range spatial_extent
##   <chr>       <int>  <dbl>      <dbl>   <dbl>     <dbl>     <dbl>          <dbl>
## 1 157790_a      255  0.928 0.0775     4.21e-1     35.9      14.3            50.2
## 2 139119        144  1.000 0          0           12.8      10.8            23.6
## 3 139127        120  0.951 0.134      7.12e-1      8.66      8.12           16.8
## 4 139121         81  1.000 0          0           12.2       2.67           14.9
## 5 172245         70  0.998 0.00000290 7.55e-6      3.44     11.3            14.7
## 6 172239         23  0.941 0.128      5.68e-1     12.6       1.68           14.3
## 7 172240         21  0.946 0.00195    5.96e-3      8.61      1.84           10.5
#Low persistence candidates (for visualization)
low_persistence_candidates <- behaviour_summary %>%
filter(
mean_g < quantile(mean_g, 0.25, na.rm = TRUE)
) %>%
arrange(sd_g)

low_persistence_candidates
## # A tibble: 8 × 7
##   id     n_points mean_g     sd_g  range_g lon_range lat_range
##   <chr>     <int>  <dbl>    <dbl>    <dbl>     <dbl>     <dbl>
## 1 184029       67  0.737 0.000205 0.000734     10.8       4.41
## 2 184027      259  0.784 0.139    0.741        24.8      10.7 
## 3 132074      137  0.766 0.190    0.848        17.6      13.1 
## 4 212419       67  0.683 0.198    0.631         9.41      4.35
## 5 203645      134  0.756 0.213    0.849        38.9      15.6 
## 6 220362       58  0.566 0.240    0.789        11.6       4.66
## 7 139125      141  0.752 0.286    0.970        23.4       4.70
## 8 220359       31  0.670 0.393    0.941         5.21      3.79
# Highly variable persistence candidates

variable_persistence_candidates <- behaviour_summary %>%
filter(
sd_g > quantile(sd_g, 0.75, na.rm = TRUE)
) %>%
arrange(desc(sd_g))

variable_persistence_candidates
## # A tibble: 8 × 7
##   id     n_points mean_g  sd_g range_g lon_range lat_range
##   <chr>     <int>  <dbl> <dbl>   <dbl>     <dbl>     <dbl>
## 1 220359       31  0.670 0.393   0.941      5.21      3.79
## 2 139125      141  0.752 0.286   0.970     23.4       4.70
## 3 172241      105  0.791 0.275   0.963      9.16      5.03
## 4 220362       58  0.566 0.240   0.789     11.6       4.66
## 5 203645      134  0.756 0.213   0.849     38.9      15.6 
## 6 131986       70  0.793 0.211   0.741     20.8       5.08
## 7 220364       74  0.793 0.209   0.829     24.0      18.2 
## 8 212419       67  0.683 0.198   0.631      9.41      4.35

Here, n_points represents the number of regularized latent track points used by the move persistence model.

list(
typical      = typical_candidates$id,
high_persist = high_persistence_candidates$id,
low_persist  = low_persistence_candidates$id,
variable     = variable_persistence_candidates$id
)
## $typical
## [1] "184025" "220361"
## 
## $high_persist
## [1] "157790_a" "139119"   "139127"   "139121"   "172245"   "172239"   "172240"  
## 
## $low_persist
## [1] "184029" "184027" "132074" "212419" "203645" "220362" "139125" "220359"
## 
## $variable
## [1] "220359" "139125" "172241" "220362" "203645" "131986" "220364" "212419"
# Representative individuals selected for visualization
# (illustrating different persistence patterns)

rep_ids <- c(
  "184025",  # typical
  "139119",  # high persistence
  "184029",  # low persistence
  "220359"   # variable
)

latent_rep <- latent_movement %>%
  dplyr::filter(id %in% rep_ids)

id_labels <- tibble(
  id = rep_ids,
  behaviour = c(
    "Typical",
    "High persistence",
    "Low persistence",
    "Highly variable"
  )
)

latent_rep <- latent_rep %>%
  left_join(id_labels, by = "id") %>%
  mutate(
    behaviour = factor(
      behaviour,
      levels = c(
        "Typical",
        "High persistence",
        "Low persistence",
        "Highly variable"
      )
    )
  )

# Transform to sf (WGS84)
latent_rep_sf <- latent_rep %>%
  sf::st_as_sf(coords = c("lon", "lat")) %>%
  sf::st_set_crs(4326)

# Projection
prj <- "+proj=laea +lat_0=0 +lon_0=-90 +datum=WGS84 +units=m +no_defs"

latent_rep_proj <- latent_rep_sf %>%
  sf::st_transform(crs = prj)

# In meters
bbox_proj <- sf::st_bbox(latent_rep_proj)

x_buffer <- 5e5   # 500 km buffer
y_buffer <- 5e5

xlim_zoom <- c(
  bbox_proj["xmin"] - x_buffer,
  bbox_proj["xmax"] + x_buffer
)

ylim_zoom <- c(
  bbox_proj["ymin"] - y_buffer,
  bbox_proj["ymax"] + y_buffer
)

world <- rnaturalearth::ne_countries(
  scale = "medium",
  returnclass = "sf"
)

world_proj <- world %>%
  sf::st_transform(crs = prj)

ggplot() +
  theme_bw() +
  geom_sf(
    data = world_proj,
    fill = "grey90",
    colour = "grey70",
    linewidth = 0.2
  ) +
  geom_sf(
    data = latent_rep_proj,
    aes(colour = g),
    size = 1,
    alpha = 0.9
  ) +
  scale_colour_viridis_c(
    name = expression(gamma[t]),
    limits = c(0, 1)
  ) +
  coord_sf(
    crs = prj,
    xlim = xlim_zoom,
    ylim = ylim_zoom,
    expand = FALSE
  ) +
  facet_wrap(~ behaviour + id, ncol = 2)

The overview map uses a shared spatial scale across individuals, which facilitates comparison of movement extent but compresses trajectories with smaller spatial ranges. To highlight individual-level movement geometry, there are additionally maps zoomed to the spatial extent of each track.

# Zoom in map
plot_one_shark_map <- function(
  latent_data,
  shark_id,
  behaviour_label,
  prj,
  buffer_km = 300
) {

  # subset one individual
  df_one <- latent_data %>%
    dplyr::filter(id == shark_id)

  # convert to sf (lon/lat)
  sf_one <- df_one %>%
    sf::st_as_sf(coords = c("lon", "lat")) %>%
    sf::st_set_crs(4326) %>%
    sf::st_transform(crs = prj)

  # bounding box in projected CRS (meters)
  bb <- sf::st_bbox(sf_one)

  buffer_m <- buffer_km * 1000

  xlim <- c(bb["xmin"] - buffer_m, bb["xmax"] + buffer_m)
  ylim <- c(bb["ymin"] - buffer_m, bb["ymax"] + buffer_m)

  # world map
  world_proj <- rnaturalearth::ne_countries(
    scale = "medium",
    returnclass = "sf"
  ) %>%
    sf::st_transform(crs = prj)

  # plot
  ggplot() +
    theme_bw() +
    geom_sf(
      data = world_proj,
      fill = "grey90",
      colour = "grey70",
      linewidth = 0.2
    ) +
    geom_sf(
      data = sf_one,
      aes(colour = g),
      size = 1.2
    ) +
    scale_colour_viridis_c(
      name = expression(gamma[t]),
      limits = c(0, 1)
    ) +
    coord_sf(
      crs = prj,
      xlim = xlim,
      ylim = ylim,
      expand = FALSE
    ) +
    labs(
      title = behaviour_label,
      subtitle = paste("ID:", shark_id)
    )
}

prj <- "+proj=laea +lat_0=0 +lon_0=-90 +datum=WGS84 +units=m +no_defs"

# Typical 
plot_one_shark_map(
  latent_data = latent_movement,
  shark_id = "184025",
  behaviour_label = "Typical",
  prj = prj,
  buffer_km = 300
)

# High persistence
plot_one_shark_map(
  latent_data = latent_movement,
  shark_id = "139119",
  behaviour_label = "High persistence",
  prj = prj,
  buffer_km = 500   
)

# Low persistence
plot_one_shark_map(
  latent_data = latent_movement,
  shark_id = "184029",
  behaviour_label = "Low persistence",
  prj = prj,
  buffer_km = 200
)

# Highly variable 
plot_one_shark_map(
  latent_data = latent_movement,
  shark_id = "220359",
  behaviour_label = "Highly variable",
  prj = prj,
  buffer_km = 200
)

# Check 
plot_one_shark_map(
  latent_data = latent_movement,
  shark_id = "203646_b",
  behaviour_label = "/",
  prj = prj,
  buffer_km = 500   
)